{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TensorFlow version: 1.5.0\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "# Check that we have correct TensorFlow version installed\n",
    "tf_version = tf.__version__\n",
    "print(\"TensorFlow version: {}\".format(tf_version))\n",
    "assert \"1.5\" <= tf_version, \"TensorFlow r1.5 or later is needed\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.logging.set_verbosity(tf.logging.INFO)\n",
    "\n",
    "train_file = \"regression-train.csv\"\n",
    "test_file = \"regression-test.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "numerical_feature_names = [\n",
    "    'PctUnder18',\n",
    "    'PctOver65',\n",
    "    'PctFemale',\n",
    "    'PctWhite',\n",
    "    'PctBachelors',\n",
    "    'PctDem',\n",
    "    'PctGop'\n",
    "]\n",
    "\n",
    "feature_columns = [tf.feature_column.numeric_column(k) for k in numerical_feature_names]\n",
    "\n",
    "def my_input_fn(file_path, repeat_count=200):\n",
    "    def decode_csv(line):\n",
    "        parsed_line = tf.decode_csv(line, [[0.],[0.],[0.],[0.],[0.],[0.],[0.],[0.]])\n",
    "        label = parsed_line[-1]  # Last element is the label\n",
    "        features = parsed_line[:-1] # Everything but last elements are the features\n",
    "        d = dict(zip(numerical_feature_names, features)), label\n",
    "        return d\n",
    "\n",
    "    dataset = (tf.data.TextLineDataset(file_path)  # Read text file\n",
    "               .map(decode_csv))  # Transform each elem by applying decode_csv fn\n",
    "    dataset = dataset.shuffle(buffer_size=256)\n",
    "    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times\n",
    "    dataset = dataset.batch(8)  # Batch size to use\n",
    "    return dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Using default config.\n",
      "WARNING:tensorflow:Using temporary folder as model directory: /var/folders/1h/g9jk9_kx67d6g0_gyfnvk1n4008m_k/T/tmpinbxdb8_\n",
      "INFO:tensorflow:Using config: {'_model_dir': '/var/folders/1h/g9jk9_kx67d6g0_gyfnvk1n4008m_k/T/tmpinbxdb8_', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x121dc8e80>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Saving checkpoints for 1 into /var/folders/1h/g9jk9_kx67d6g0_gyfnvk1n4008m_k/T/tmpinbxdb8_/model.ckpt.\n",
      "INFO:tensorflow:loss = 1.0388168, step = 1\n",
      "INFO:tensorflow:global_step/sec: 630.799\n",
      "INFO:tensorflow:loss = 0.19403698, step = 101 (0.159 sec)\n",
      "INFO:tensorflow:global_step/sec: 815.889\n",
      "INFO:tensorflow:loss = 0.074764445, step = 201 (0.123 sec)\n",
      "INFO:tensorflow:global_step/sec: 970.082\n",
      "INFO:tensorflow:loss = 0.014307391, step = 301 (0.103 sec)\n",
      "INFO:tensorflow:global_step/sec: 797.569\n",
      "INFO:tensorflow:loss = 0.06200833, step = 401 (0.125 sec)\n",
      "INFO:tensorflow:global_step/sec: 875.119\n",
      "INFO:tensorflow:loss = 0.09571688, step = 501 (0.114 sec)\n",
      "INFO:tensorflow:global_step/sec: 869.347\n",
      "INFO:tensorflow:loss = 0.029630365, step = 601 (0.115 sec)\n",
      "INFO:tensorflow:global_step/sec: 852.931\n",
      "INFO:tensorflow:loss = 0.05036062, step = 701 (0.117 sec)\n",
      "INFO:tensorflow:global_step/sec: 876.509\n",
      "INFO:tensorflow:loss = 0.049179126, step = 801 (0.115 sec)\n",
      "INFO:tensorflow:global_step/sec: 859.024\n",
      "INFO:tensorflow:loss = 0.018018425, step = 901 (0.116 sec)\n",
      "INFO:tensorflow:global_step/sec: 854.469\n",
      "INFO:tensorflow:loss = 0.0358577, step = 1001 (0.117 sec)\n",
      "INFO:tensorflow:global_step/sec: 852.95\n",
      "INFO:tensorflow:loss = 0.015942447, step = 1101 (0.117 sec)\n",
      "INFO:tensorflow:global_step/sec: 847.795\n",
      "INFO:tensorflow:loss = 0.054604523, step = 1201 (0.118 sec)\n",
      "INFO:tensorflow:global_step/sec: 879.246\n",
      "INFO:tensorflow:loss = 0.0369962, step = 1301 (0.114 sec)\n",
      "INFO:tensorflow:global_step/sec: 861.149\n",
      "INFO:tensorflow:loss = 0.05440944, step = 1401 (0.116 sec)\n",
      "INFO:tensorflow:global_step/sec: 850.413\n",
      "INFO:tensorflow:loss = 0.020052407, step = 1501 (0.118 sec)\n",
      "INFO:tensorflow:global_step/sec: 884.391\n",
      "INFO:tensorflow:loss = 0.02036947, step = 1601 (0.113 sec)\n",
      "INFO:tensorflow:global_step/sec: 864.131\n",
      "INFO:tensorflow:loss = 0.011700297, step = 1701 (0.116 sec)\n",
      "INFO:tensorflow:global_step/sec: 880.66\n",
      "INFO:tensorflow:loss = 0.015125519, step = 1801 (0.114 sec)\n",
      "INFO:tensorflow:global_step/sec: 850.739\n",
      "INFO:tensorflow:loss = 0.011621947, step = 1901 (0.118 sec)\n",
      "INFO:tensorflow:global_step/sec: 871.703\n",
      "INFO:tensorflow:loss = 0.010234773, step = 2001 (0.115 sec)\n",
      "INFO:tensorflow:global_step/sec: 824.886\n",
      "INFO:tensorflow:loss = 0.012169201, step = 2101 (0.121 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 2188 into /var/folders/1h/g9jk9_kx67d6g0_gyfnvk1n4008m_k/T/tmpinbxdb8_/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 0.008745724.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.estimator.canned.linear.LinearRegressor at 0x121dc8cc0>"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classifier = tf.estimator.LinearRegressor(feature_columns=feature_columns)\n",
    "\n",
    "# Run training for 7 epochs (7 times through our entire dataset)\n",
    "# You can experiment with this value for your own dataset\n",
    "classifier.train(\n",
    "    input_fn=lambda: my_input_fn(train_file, 7))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Starting evaluation at 2018-02-22-18:04:47\n",
      "INFO:tensorflow:Restoring parameters from /var/folders/1h/g9jk9_kx67d6g0_gyfnvk1n4008m_k/T/tmpinbxdb8_/model.ckpt-2188\n",
      "INFO:tensorflow:Finished evaluation at 2018-02-22-18:04:47\n",
      "INFO:tensorflow:Saving dict for global step 2188: average_loss = 0.001390136, global_step = 2188, loss = 0.011048873\n",
      "average_loss: 0.001390136\n",
      "global_step: 2188\n",
      "loss: 0.011048873\n"
     ]
    }
   ],
   "source": [
    "results = classifier.evaluate(input_fn=lambda: my_input_fn(test_file, 1))\n",
    "for key in sorted(results):\n",
    "  print('%s: %s' % (key, results[key]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate predictions on 3 counties\n",
    "prediction_input = {\n",
    "    'PctUnder18': [23.9, 25.7, 10.6],\n",
    "    'PctOver65': [17.6,24.7,15.8],\n",
    "    'PctFemale': [50.0,48.5,53.5],\n",
    "    'PctWhite':[0.965, 0.97, 0.75],\n",
    "    'PctBachelors':[12.7, 17.0, 49.8],\n",
    "    'PctDem': [0.3227832512315271, 0.09475032010243278, 0.6346801346801347],\n",
    "    'PctGop': [0.6545566502463054, 0.8911651728553138, 0.3468013468013468]\n",
    "}\n",
    "\n",
    "def test_input_fn():\n",
    "   dataset = tf.data.Dataset.from_tensors(prediction_input)\n",
    "   return dataset\n",
    "\n",
    "# Predict all our prediction_input\n",
    "pred_results = classifier.predict(input_fn=test_input_fn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Restoring parameters from /var/folders/1h/g9jk9_kx67d6g0_gyfnvk1n4008m_k/T/tmpinbxdb8_/model.ckpt-2188\n",
      "(0, {'predictions': array([0.2305259], dtype=float32)})\n",
      "(1, {'predictions': array([0.03020383], dtype=float32)})\n",
      "(2, {'predictions': array([0.68529695], dtype=float32)})\n"
     ]
    }
   ],
   "source": [
    "# Actual values for the raw prediction data:\n",
    "# 1) 23% Clinton\n",
    "# 2) 5% Clinton\n",
    "# 3) 69% Clinton\n",
    "\n",
    "for pred in enumerate(pred_results):\n",
    "    print(pred)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
